Setting libraries and workk dir

library(mixOmics)
## Loading required package: MASS
## Loading required package: lattice
## Loading required package: ggplot2
## 
## Loaded mixOmics 6.26.0
## Thank you for using mixOmics!
## Tutorials: http://mixomics.org
## Bookdown vignette: https://mixomicsteam.github.io/Bookdown
## Questions, issues: Follow the prompts at http://mixomics.org/contact-us
## Cite us:  citation('mixOmics')
library(dplyr)
## 
## Attaching package: 'dplyr'
## The following object is masked from 'package:MASS':
## 
##     select
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
library(tibble)
library(janitor)
## 
## Attaching package: 'janitor'
## The following objects are masked from 'package:stats':
## 
##     chisq.test, fisher.test
library(fs)
library(condvis)


experiment <- list()
workdir <- path("/Users/kiselev/Documents/multiomics/mixOmcis_groups")

#read sample names file
meta <- read.table(paste0(workdir, "/meta.csv"), header = TRUE, sep = ",")
colnames(meta) <- c("proteo", "metabo", "group")
groups <- as.factor(meta$group)

Read metabolomics data and try different normalization methods

metabo <- read.table(paste0(workdir, "/metabo.csv"), header = TRUE, sep = ",", row.names = 1, check.names = FALSE)

#Order columns according to meta file
metabo <- metabo[, meta$metabo] %>% t()

#normalize data by the total sum of each sample
metabo_sum <- metabo / rowSums(metabo)

#Log normalize data
metabo_log <- log2(metabo + 1)

#Log normalize data
metabo_sum_log <- log2(metabo_sum + 1)



#TODO: Eigen normalization https://rdrr.io/github/idrblab/NOREVA/man/EIGENMS.html

metabo.pca <- pca(metabo, ncomp = 2, scale = TRUE, center = TRUE)
plotIndiv(metabo.pca, ind.names = FALSE,
          title = "PCA: metabo non-normalized",
          group=groups,
          pch = as.numeric(factor(groups)),
          pch.levels =groups,
          legend = TRUE)

# create a vector of 8 colors
colors <- c("blue", "orange", "grey","green4", "red", "purple", "black", "yellow", "pink")

#Heatmap
cim(metabo.pca, row.sideColors = colors[groups],
    title = "Heatmap: metabo non-normalized")

##Metabo sum noralized
metabo.pca <- pca(metabo_sum, ncomp = 2, scale = TRUE, center = TRUE)
plotIndiv(metabo.pca, ind.names = FALSE,
          title = "PCA: metabo sum-normalized",
          group=groups,
          pch = as.numeric(factor(groups)),
          pch.levels =groups,
          legend = TRUE)

#Heatmap
cim(metabo.pca, row.sideColors = colors[groups],
    title = "Heatmap: metabo sum-normalized", margins=c(2,2))

##Metabo sum noralized
metabo.pca <- pca(metabo_log, ncomp = 2, scale = TRUE, center = TRUE)
plotIndiv(metabo.pca, ind.names = FALSE,
          title = "PCA: metabo log-normalized",
          group=groups,
          pch = as.numeric(factor(groups)),
          pch.levels =groups,
          legend = TRUE)

#Heatmap
cim(metabo.pca, row.sideColors = colors[groups],
    title = "Heatmap: metabo log-normalized")

##Metabo sum log noralized
metabo.pca <- pca(metabo_sum_log, ncomp = 2, scale = TRUE, center = TRUE)
plotIndiv(metabo.pca, ind.names = FALSE,
          title = "PCA: metabo sum log-normalized",
          group=groups,
          pch = as.numeric(factor(groups)),
          pch.levels =groups,
          legend = TRUE)

#Heatmap
cim(metabo.pca, row.sideColors = colors[groups],
    title = "Heatmap: metabo log-normalized")

Continue with metabo_log normalization

metabo.pca <- pca(metabo_log, ncomp = 3)
plotIndiv(metabo.pca,
          group=groups,
          title = "PCA: Metabo log-normalized",
          legend = TRUE)

metabo.plsda <- plsda(metabo_log, groups, ncomp = 3)
plotIndiv(metabo.plsda,
          title = "PLS-DA: Metabo log-normalized",
          legend = TRUE)

#Count NA values
metabo.splsda <- splsda(metabo_log, groups, ncomp = 3, keepX = c(10, 10, 10))
plotIndiv(metabo.splsda,
          title = "sPLS-DA: Metabo log-normalized, keepX = 10",
          comp = c(1, 2),
          legend = TRUE,
          col.per.group = colors)

?plotIndiv

plotVar(metabo.splsda)

cim(metabo.splsda, row.sideColors = colors[groups],
            title = "Heatmap: metabo splsda keepX = 10",
            legend = list(levels(groups)))
## Error in cim plot: figure margins too large. See ?cim for help.

Integrate with proteomics data

proteo <- read.table(paste0(workdir, "/M66_50_intotal_afterimputation_withappBBS_BBS.txt"), header = TRUE, sep = "\t", check.names = FALSE, dec = ",")
proteo <- proteo %>% 
  column_to_rownames("T: Genes") %>%
  select(-c('T: Protein.Group', 
  'T: Protein.Ids', 
  'T: Protein.Names', 
  'T: First.Protein.Description')) %>%
  t() %>%
  as.data.frame() %>%
  rownames_to_column("proteo") %>%
  inner_join(meta, by = "proteo") %>%
  column_to_rownames("metabo") %>%
  select(-c('proteo', 'group'))

dim(proteo)
## [1]  156 1360
proteo.integrate <- proteo[intersect(rownames(proteo), rownames(metabo_log)), ]
metabo_log.integrate <- metabo_log[intersect(rownames(proteo), rownames(metabo_log)), ]

block pls

groups.integrate <- as.factor(meta[meta$metabo %in% rownames(proteo.integrate), ]$group)

proteo.metabo.pls <- pls(metabo_log.integrate, proteo.integrate)
plotIndiv(proteo.metabo.pls, ind.names = FALSE,
          title = "PLS: Metabo log-normalized and Proteo",
          group=groups.integrate,
          pch = as.numeric(factor(groups.integrate)),
          pch.levels =groups.integrate,
          legend = TRUE)

omics <- list(metabo = metabo_log.integrate, proteo = proteo.integrate)


proteo.metabo.block.plsda <- block.splsda(omics, groups.integrate, keepX = list(metabo = c(20, 20), proteo = c(50,50))) # run the method
plotIndiv(proteo.metabo.block.plsda) # plot the samples

cimDiablo(proteo.metabo.block.plsda, comp = 1, margins = c(7,7))
## 
## trimming values to [-3, 3] range for cim visualisation. See 'trim' arg in ?cimDiablo

proteo.metabo.block.plsda <- block.splsda(omics, groups.integrate, keepX = list(metabo = c(20, 20), proteo = c(30,30))) # run the method
plotIndiv(proteo.metabo.block.plsda) # plot the samples

cimDiablo(proteo.metabo.block.plsda, comp = 1, margins = c(7,7))
## 
## trimming values to [-3, 3] range for cim visualisation. See 'trim' arg in ?cimDiablo

proteo.metabo.block.plsda <- block.splsda(omics, groups.integrate, keepX = list(metabo = c(10, 10), proteo = c(30,30))) # run the method
plotIndiv(proteo.metabo.block.plsda) # plot the samples

cimDiablo(proteo.metabo.block.plsda, comp = 1, margins = c(7,7))
## 
## trimming values to [-3, 3] range for cim visualisation. See 'trim' arg in ?cimDiablo

proteo.metabo.block.plsda <- block.splsda(omics, groups.integrate, keepX = list(metabo = c(10, 10), proteo = c(20,20))) # run the method
plotIndiv(proteo.metabo.block.plsda) # plot the samples

cimDiablo(proteo.metabo.block.plsda, comp = 1, margins = c(7,7))
## 
## trimming values to [-3, 3] range for cim visualisation. See 'trim' arg in ?cimDiablo